"""
/*
 * Copyright 2011 OpenWAF.com
 *
 * Licensed under the Apache License, Version 2.0 (the "License"); you may not
 * use this file except in compliance with the License. You may obtain a copy of
 * the License at
 *
 * http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
 * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
 * License for the specific language governing permissions and limitations under
 * the License.
 */
 """

import sys
class THelper:
    ws_char = [chr(0x0A), chr(0x20), chr(0x09), chr(0x0C), chr(0x0D)]
    sp_char = ['>','<','/','%','=']#['(', ')', '[', ']', '\\', '/', '^', '-', '?', '.', '*', '+', '{', '}', '&', ':', ';', ',', '<', '>', '=', '@', '!', '%', '|', '~']
    opr_1 = ['=','>','<','/']#['.', '+', '-', '*', '/', '%', '<', '>', '&', '|', '^', '?', ':', '=', '~', '!']
    opr_2 = ['/>','<%','%>','</','<!']#['++', '--', '<<', '<=', '=', '==', '!=', '&&', '||', '+=', '-=', '*=', '/=', '%=', '&=', '|=', '^=']
    opr_3 = ['<!-']#['<<=']
    opr_4=  ['<!--']

    @staticmethod
    def is_operator(s):
        slen = len(s)
        if slen > 4:return False
        if slen == 1:
            if s.isalpha(): return False
            if s.isdigit(): return False
            return s in THelper.opr_1
        else:
            c = s[0]
            if c.isalpha(): return False
            if c.isdigit(): return False
            if slen == 2:
                return s in THelper.opr_2
            elif slen==3:
                return s in THelper.opr_3
            elif slen==4:
                return s in THelper.opr_4
            return False
        
    @staticmethod
    def is_sp_char(s):
        return s in THelper.sp_char
    
    @staticmethod
    def is_whitespace(s):
        return s in THelper.ws_char
class Stream:
    def __init__(self, fname ,its_a_data=False):
        if its_a_data==False:
            self.data = open(fname, "rb").read()
        else:
            self.data = fname
        self.max_len = len(self.data)
        self.cur_pos = 0
        self.peeked = False
        self.peek_count = 0
        self.line_offset = 1
        self.line_number = 1
    def peek(self):
        self.peeked = True
        if self.cur_pos + self.peek_count >= self.max_len:
            return None
        c = self.data[self.cur_pos + self.peek_count]
        self.peek_count += 1
        return c
    def peeks(self, count):
        self.peeked = True
        end = self.cur_pos + self.peek_count + count
        start = self.cur_pos + self.peek_count
        if end >= self.max_len:
            return None
        self.peek_count += count
        return self.data[start:end]
    def read(self):
        if self.peeked:#avoiding funciton call to reset
            self.peeked = False
            self.peek_count = 0
        if self.cur_pos >= self.max_len:
            return None
        c = self.data[self.cur_pos]
        self.cur_pos += 1
        self.line_offset += 1
        if  c == '\n':
            self.line_number += 1
            self.line_offset = 1
        return c
    def reads(self, count):
        if self.peeked:
            self.peeked = False
            self.peek_count = 0
        end = self.cur_pos + count
        if end >= self.max_len:
            return None
        self.line_offset += count
        self.cur_pos += count
        return self.data[self.cur_pos:self.cur_pos + count]
    def reset(self):
        if not self.peeked:return
        self.peeked = False
        self.peek_count = 0

class TokenStream:
    def __init__(self, tokens, filepath):
        self.filepath = filepath
        self.tokens = tokens
        self.maxIndex = len(tokens)
        self.curIndex = 0
    def getCurrentTokenAsItIs(self):
        return self.tokens[self.curIndex]
    def getCurrentToken(self):
        while True:
            if self.curIndex >= self.maxIndex:return None
            tok = self.tokens[self.curIndex]
            if tok.data.startswith("/*") or tok.data.startswith("//"):
                self.curIndex += 1
                continue
            return tok
    def getCurrentIndex(self):
        return self.curIndex
    def setCurrentIndex(self, index):
        self.curIndex = index
    def getNextToken(self):
        self.curIndex += 1
        tok = self.getCurrentToken()
        return tok
    def isEnd(self):
        return self.curIndex >= self.maxIndex


class Token:
    def __init__(self, s, lineno, pos):
        self.data = s
        self.lineno = lineno
        self.pos = pos
    def __str__(self):
        return self.data
def tokenizeData(data):
    s=Stream(data,True)
    return tokenizeStream(s,True)
def tokenize(filepath,pws=False):
    s = Stream(filepath)
    return tokenizeStream(s,pws)
def tokenizeStream(s,pws=False):
    tokens = []
    c = ""
    nc=""
    nc1 = ""
    nc2= ""
    nc3=""
    
    data = ""
    while True:
        c = s.read()
        if c == None or len(c) == 0:break
        nc = s.peek()
        nc1=s.peek()
        nc2=s.peek()
        if THelper.is_whitespace(c):
            tokens.append(Token(c, s.line_number, s.line_offset))
            continue
        elif c == '<' and nc == '%':
            data = "<%"
            s.read()
            while True:
                pv=s.peeks(2)
                if pv==None:break
                if pv == '%>':
                    data += "%>"
                    break
                data += s.read()
            s.reads(2)
            tokens.append(Token(data, s.line_number, s.line_offset))
        elif c == '<' and nc == '!' and nc1=="-" and nc2=="-" :
            data = "<!--"
            s.read()
            s.read()
            s.read()
            while True:
                pv=s.peeks(3)
                if pv==None:break
                if pv == '-->':
                    data += "-->"
                    break
                data += s.read()
            s.reads(3)
            tokens.append(Token(data, s.line_number, s.line_offset))          
        elif c == '\'':
            data = c
            if s.peeked:
                s.peeked = False
                s.peek_count = 0
            while True:
                c = s.peek()
                if c==None:break
                if c == '\\':
                    data += s.read()
                    data += s.read()
                elif c == '\'':
                    data += s.read()
                    break
                else:
                    data += s.read()
            tokens.append(Token(data, s.line_number, s.line_offset))
        elif c == '\"':
            data = c
            if s.peeked:
                s.peeked = False
                s.peek_count = 0
            while True:
                c = s.peek()
                if c==None:break
                if c == '\\':
                    data += s.read()
                    data += s.read()
                elif c == '\"':
                    data += s.read()
                    break
                else:
                    data += s.read()
            tokens.append(Token(data, s.line_number, s.line_offset))
        else:
            
            data = c
            if s.peeked:
                s.peeked = False
                s.peek_count = 0
            if THelper.is_operator(data):
                pv=s.peek()
                if pv==None:
                    tokens.append(Token(data, s.line_number, s.line_offset))
                    break
                else:
                    while THelper.is_operator(data + pv):
                        data += s.read()
                        pv=s.peek()
                        if pv==None:
                            break
                tokens.append(Token(data, s.line_number, s.line_offset))
            elif THelper.is_sp_char(data):
                tokens.append(Token(data, s.line_number, s.line_offset))
            else:
                c = s.peek()
                if c==None:
                    tokens.append(Token(data, s.line_number, s.line_offset))
                    break
                while not (THelper.is_sp_char(c) or THelper.is_operator(c) or THelper.is_whitespace(c)):
                    s.read()
                    data += c
                    c = s.peek()
                tokens.append(Token(data, s.line_number, s.line_offset))

    return tokens

if __name__ == "__main__":
    tokens = tokenize(sys.argv[1],True)
    s=""
    for t in tokens:
        #if len(t.data.strip())==0:continue
        print t.data
    #print s
    sys.exit(0)
    h={}
    for t in tokens:
        if h.has_key(t.data):
            h[t.data]+=1
        else:
            h[t.data]=0
    for k in h:
        #if len(k)<2:continue
        if h[k]<10:continue
        if k.startswith("\""):
            print k+",",h[k]
        else:
            print "\""+k+"\",",h[k]

